********************************************************************************
** Cederman, Galano, Girardin and Schvitz. War Did Make States.
** Article prepared for International Organization
** June 20, 2022
**
** Stata do-file: data_prep1.do
** Second data preparation file for state-level data
** Required file paths set in runall.do
********************************************************************************

cd $ROOT
cd $INTERMEDIATEDIR
use "statedata_intermediate.dta", clear 


merge 1:1 id year using dyad_cumul
drop _merge

merge 1:1 id year using cumulneigh
drop _merge


** Remove phantom years
// "Phantom years are those preceding or succeding the lifespan of a state"
// They are used to compute size changes etc. but should not be used in analysis
drop if willborn == "true"
drop if hasdied == "true"

// Compute new version of death variables
drop death finaldeath deaths deaths1 lifeyears spline*
gen death = 0
replace death = 1 if id!=. & f5.id==. & year<1790
gen finaldeath = 0
replace finaldeath = 1 if year == maxyear & year<1790

// Compute death history variables
bysort id (year): gen deaths = sum(death)
gen deaths1 = 0
replace deaths1 = 1 if deaths > 0 & deaths != .

// Duration dependence for lifespan of state
btscs death year id, gen(lifeyears) nspline(3)
rename _spline1  spline1
rename _spline2  spline2
rename _spline3  spline3


// Compute variables for regression analysis

gen warXsize = inc1*llarea

gen warXwargrowth = inc1*llwargrowth

gen warXpeacegrowth = inc1*llpeacegrowth

gen warXwarshrink = inc1*llwarshrink
gen warXpeaceshrink = inc1*llpeaceshrink

gen peaceXwargrowth = (1-inc1)*llwargrowth
gen peaceXpeacegrowth = (1-inc1)*llpeacegrowth
gen peaceXwarshrink = (1-inc1)*llwarshrink
gen peaceXpeaceshrink = (1-inc1)*llpeaceshrink


gen onsetinit1 = 0 
replace onsetinit1 = 1 if onsetinitiatorbrecke>0 & onsetinitiatorbrecke!=.

// compute various duration dep. vars. (not all used)
btscs onsetinit1 year id, gen(pys) nspline(3)
rename _spline1  pyspline1
rename _spline2  pyspline2
rename _spline3  pyspline3

gen onsetinitside1 = onsetinit1
replace onsetinitside1 = 1 if onsetfollowerbrecke == 1

btscs onsetinitside1 year id, gen(pys2) nspline(3)
rename _spline1  pys2pline1
rename _spline2  pys2pline2
rename _spline3  pys2pline3

gen incinit1 = 0
replace incinit1 = 1 if inc1>0 & incidenceinitiatorbrecke > 0 & incidenceinitiatorbrecke !=.

gen incinitside1 = incinit1
replace incinitside1 = 1 if incidencefollowerbrecke > 0 & incidencefollowerbrecke !=.

gen attacked1 = 0
replace attacked1 = 1 if onsetbrecke>0 & onsetinit1==0 & onsetinitside1==0

btscs attacked1 year id, gen(pyas) nspline(3)
rename _spline1  pyaspline1
rename _spline2  pyaspline2
rename _spline3  pyaspline3

gen incattacked1 = 0
replace incattacked1 = 1 if incidencebrecke>0 & incinit1==0 & incinitside1==0

///////////////////////////////////////////////////////////////////////////////////////////
// cumulative counters for number of war years

bys id (year): gen warsum1 = sum(inc1)
gen lwarsum1 = l5.warsum1
gen llwarsum1 = log(lwarsum1+1)
bys id (year): gen warsum = sum(incidencebrecke)
gen lwarsum = l5.warsum
gen llwarsum = log(lwarsum+1)

bys id (year): gen onsetinitsum = sum(onsetinit1)
gen lonsetinitsum = l5.onsetinitsum
gen llonsetinitsum = log(lonsetinitsum + 1)


///////////////////////////////////////////////////////////////////////////////////////////
/*
gen lldwin_wargains = log(dwin_wargains+1)
gen lldwin_warlosses = log(dwin_warlosses+1)
gen lldwin_peacegains = log(dwin_peacegains+1)
gen lldwin_peacelosses = log(dwin_peacelosses+1)

gen dwin_netwargains = dwin_wargains-dwin_warlosses
replace dwin_netwargains = 0 if dwin_netwargains<0
gen lldwin_netwargains = log(dwin_netwargains+1)
gen dwin_netwarlosses = dwin_warlosses-dwin_wargains
replace dwin_netwarlosses = 0 if dwin_netwarlosses<0
gen lldwin_netwarlosses = log(dwin_netwarlosses+1)

gen dwin_netpeacegains = dwin_peacegains-dwin_peacelosses
replace dwin_netpeacegains = 0 if dwin_netpeacegains<0
gen lldwin_netpeacegains = log(dwin_netpeacegains+1)
gen dwin_netpeacelosses = dwin_peacelosses-dwin_peacegains
replace dwin_netpeacelosses = 0 if dwin_netpeacelosses<0
gen lldwin_netpeacelosses = log(dwin_netpeacelosses+1)


bys year: egen warring = sum(inc1)
*/

xtset id year
gen llcoastdist = log(l5.coastmin + 1)
gen lcoastal = 0
replace lcoastal = 1 if l5.coastmin < 10

*gen lwarring = l5.warring
*gen lsharewarring = (l5.warring/l5.n)
replace elevationsd = 0 if elevationsd == . 

gen llelevsd = log(1 + l5.elevationsd)


// All main datasets have been prepared for analysis
// We are now ready to run analysis do-files

cd $ROOT
cd $INTERMEDIATEDIR
save "statedata.dta", replace 
